/* create datasets for regressions */
libname edu '!userprofile\\Dropbox\Education\Replication\Data';

data nsf (keep = year_ending major bachelor bachelor_male bachelor_female);
  set edu.nsf4;
run;

* merge to get major code;
proc sql;
  create table nsf as
  select a.*, b.major as majorcode 
  from nsf as a, edu.nsf_majors as b
  where a.major = b.majortitle;
quit;

* calculate log enrolment;
data nsf;
  set nsf;
  log_bachelor = log(bachelor);
  log_bachelor_male = log(bachelor_male);
  log_bachelor_female = log(bachelor_female);
run;

* add data from industry_skew_major, add suffix _major to all variables;
data skew1;
  set edu.industry_skew_major;
run;

proc sql;
select cat(name, ' = ', cats(name, '_major' )) into :renstr separated by ' ' from
dictionary.columns where libname = 'WORK' and memname='SKEW1';
quit;

data skew2;
set skew1 (rename = (&renstr));
rename year_major = year;
rename major_major = major;
run; 

* save as permanent datasets;
data edu.skew; set skew2; run;
proc sort data = edu.skew; by year major; run;

data edu.enrolment; set nsf; run;

